# -*- coding: utf-8 -*-
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#作者：cacho_37967865
#博客：https://blog.csdn.net/sinat_37967865
#文件：stop_words.py
#日期：2019-09-09
#备注：一些常见的停用词表，此外还有txt文件处理需要过滤的词
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

# 开头跳过（一维列表）
def get_stopwords():
    stop_words = []
    with open("F:\PythonProject\Python\\file\stopwords.txt", "r", encoding='utf-8') as fp:
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            stop_words.append(line)
    #print(stop_words)
    return(stop_words)


# 替换内容（二维列表）
def get_txtWords():
    txt_words = []
    with open("F:\PythonProject\Python\\file\\txtWords.txt", "r", encoding='utf-8') as fp:
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            info = line.split('|')
            txt_words.append(info)
    #print(txt_words)
    return(txt_words)


# 某行含有 字母，特殊字符显示（一维列表）
def get_alphaWords():
    alpha_words = []
    with open("F:\PythonProject\Python\\file\\alphaWords.txt", "r", encoding='utf-8') as fp:
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            alpha_words.append(line)
    #print(alpha_words)
    return(alpha_words)


# 某行开头含有字符去除（一维列表）
def get_skipWords():
    skip_words = []
    with open("F:\PythonProject\Python\\file\skipWords.txt", "r", encoding='utf-8') as fp:
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            skip_words.append(line)
    #print(skip_words)
    return(skip_words)


# 某行含有关键字
def get_keywords():
    key_words = []
    with open("F:\PythonProject\Python\\file\keyWords.txt", "r", encoding='utf-8') as fp:
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            if line[0:1] != '#':
                info = line.split('|')
                key_words.append(info)
    #print(key_words)
    return (key_words)


# 找到某个表的数据
def get_stocks():
    with open("F:\PythonProject\Python\\file\stocks.txt", "r", encoding='utf-8') as fp:
        key_words =[]
        for line in fp.readlines():
            line = line.strip()
            if not len(line):
                continue
            if line[0:1] != '#':
                info = line.split('|')
                key_words.append(info)
    #print(key_words)
    return (key_words)


if __name__ == '__main__':
    #get_stopwords()
    #get_txtWords()
    #get_alphaWords()
    get_skipWords()
    #get_keywords()